use "${FINALDATA}\immo_panel.dta", clear
*************************************************
* generate codebook
************************************************* 
if ($LOGTIME == 1) log using "${LOG}/codebook_immo_panel.log", replace
	codebook, compact
log close


cap log close
if ($LOGTIME == 1) log using "${LOG}/${DATE}_analyses_price_effects.log", replace

***************************************
***************************************
* Analysis (panel data set)
***************************************
***************************************

*************************************************
* generate and recode variables
************************************************* 

bysort geo (time): gen geo1 = _n == 1 
bysort geo (time): gen geolast = _n == _N 

* Define compliers
bysort geo: egen obs_geo = count(geo)
bysort geo: egen epc_geo = total(epc)
gen complier = 0
replace complier = 1 if (obs_geo != epc_geo) & epc_geo != 0

* Code year of construction
recode baujahr_kat2(2015 = 2002)
replace baujahr_kat2 = 9999 if missing(baujahr_kat2)

egen baujahr_kat3 = cut(baujahr), at(1900,1977,2002,2020)
replace baujahr_kat3 = 9999 if missing(baujahr_kat3)

egen baujahr_med = cut(baujahr) if geo1 == 1, group(2)
bysort geo (baujahr_med): replace baujahr_med = baujahr_med[_n-1] if _n > 1
replace baujahr_med = 9999 if missing(baujahr_med)

gen baujahr_adopt = baujahr_kat
recode baujahr_adopt ( 1930 1950 1960 1970 = 1900) (1995 1990 = 1977) (2014 2008 = 2002)
gen modern_adopt = modern_kat
recode modern_adopt (1990 = 1977) (2015 2008 = 2002)

* recode continuous variable for number of rooms
replace zimmer_cont = . if zimmer == .
gen zimmer_cont2 = zimmer_cont^2

* time of disclosure
gen epc_change_time = time*epc_change 
gsort geo -epc_change_time
bysort geo: replace epc_change_time = epc_change_time[_n-1] if _n > 1

* dummy which is unity for disclosing sellers after disclosure 
gen epc_post = epc
replace epc_post = 0 if epc_change_time < monthly("2014 05","YM")


** clean eec variable
gen eec_total = eec

* cutoffs for the definition of classes
mat evk_values = J(10,1,.)
mat evk_values[1,1] = 0 // 0 bis 30: A+
mat evk_values[2,1] = 30
mat evk_values[3,1] = 50
mat evk_values[4,1] = 75
mat evk_values[5,1] = 100
mat evk_values[6,1] = 130
mat evk_values[7,1] = 160
mat evk_values[8,1] = 200
mat evk_values[9,1] = 250 // >  250: G 
mat evk_values[10,1] = . // >  250: G 

* when eec does not match -> set to missing
foreach i of numlist 2(1)10 {
	replace eec = 99 if eec==`=`i'-1' & !(evk>=evk_values[`=`i'-1',1] & evk < evk_values[`i',1] )
}
** recode evk_nomiss (substract mean, /100)
cap drop evk_nomiss_rec
gen evk_nomiss_rec = evk_nomiss

sum evk_nomiss if evk_nomiss!=0, d
replace evk_nomiss_rec = evk_nomiss_rec - r(mean)
replace evk_nomiss_rec = 0 if epc==0
replace evk_nomiss_rec = evk_nomiss_rec/100 // recode so that units are more easily interpretable

gen evk_nomiss_rec2 = evk_nomiss_rec^2


* create 3 groups by year of contruction
****************
tab baujahr_kat3, gen(bau_kat)

* Matrix generieren:
mat bau_values = J(3,1,.)
mat bau_values[1,1] = 1900
mat bau_values[2,1] = 1977
mat bau_values[3,1] = 2002

* Variable generieren:
foreach num of numlist 1/3 {
	rename bau_kat`num' bau_`=bau_values[`num',1]'
	gen epc_bau_`=bau_values[`num',1]' = epc*bau_`=bau_values[`num',1]'
	gen bankspost_bau_`=bau_values[`num',1]' = bankspost*bau_`=bau_values[`num',1]'
}



**************************************************
* Globals for house characteristics
**************************************************

global merkat baujahr_adopt modern_adopt objektkat qualitaet heizart
global merkat_i ""
foreach x of global merkat {
	global merkat_i $merkat_i i.`x' 
}
global merkont grundflaeche wohnflaeche zimmer_cont grundflaeche2 wohnflaeche2 



******************************************
******************************************
* Auswertungen
******************************************
******************************************

* OLS regression
*******************

xtset plz
xtreg logkaufp epc $merkat_i $merkont ib1.objektzus ib5.anbietertyp i.time if geolast, fe vce(robust)
est sto ols


* DiD regressions
*******************

* DiD: 
tsset geo time
qui xtreg logkaufp epc i.time, fe vce(cluster geo)
est sto did

* DiD: heterngeneity by year of construction
qui xtreg logkaufp epc epc_bau_1977 epc_bau_1900 i.time, fe vce(cluster geo)
est sto did_bau


	
* IV regressions
*****************

* DiD: reduced form
quietly xtreg logkaufp bankspost i.time, fe vce(cluster geo)
est sto did_rf

* DiD: first stage
quietly xtreg epc bankspost i.time, fe vce(cluster geo)
est sto did_fs
* weak instrument check
test bankspost = 0

* IV: Parsimonious Specification
quietly xtivreg logkaufp i.time (epc = bankspost), fe vce(cluster geo) first
est sto iv

* IV: heterngeneity by year of construction
quietly xtivreg logkaufp i.time (epc_bau_1900 epc_bau_1977 epc = bankspost_bau_1900 bankspost_bau_1977 bankspost), fe vce(cluster geo)
est sto iv_bau	

	


********************************************************************************
* Table 4: Estimates of the Effect of Energy Information Disclosure on House Prices
********************************************************************************
estout ols did did_bau iv iv_bau, ///
	cells( "b(fmt(3) star) se(par fmt(3))" ) ///
	stats(N N_clust, fmt(%9.0fc %9.0fc)) ///
	varwidth(12) substitute(_ \_) ///
	starlevels(* 0.05 ** 0.01) ///
	keep(epc epc*) // style(tex)


********************************************************************************
* Table 7 (Online Appendix H): First-Stage and Reduced-Form Estimates for the IV Model
********************************************************************************

estout did_fs did_rf iv, ///
	cells(b(fmt(3) star) se(fmt(3)) ci(fmt(3))   ) ///
	stats(N N_clust, fmt(%9.0fc %9.0fc)) ///
	varwidth(12) starlevels(* 0.05 ** 0.01) ///
	drop(*time _cons) // style(tex)	
	
	
	
	
	
***********************************************************************************
* Table 5: Estimates of the Effect of Energy Information Disclosure on House Prices
*********************************************************************************** 

***
* Robustness: IV
***

* excluding newly renovated homes (R3, C8c)
global cond if (modern<2013)|missing(modern) 
quietly xtivreg logkaufp i.time (epc = bankspost) $cond, fe vce(cluster geo) first
est sto iv_modern
	
* other energy performance certificates
global cond if eec==99 
quietly xtivreg logkaufp i.time (epc = bankspost) $cond, fe vce(cluster geo) first
est sto iv_newepc
	
* fuzzy RD regression
xtivreg logkaufp (epc = postmai) if inrange(time, `=monthly("2014 04","YM")', `=monthly("2014 05","YM")'), fe vce(cluster geo)
est sto fuzzy_rd


***
* Robustness: Panel
***

* excluding newly renovated homes (R3, C8c)
global cond if (modern<2013)|missing(modern) 
qui xtreg logkaufp epc i.time $cond , fe vce(cluster geo)
est sto panel_modern

* Energy performance certificates without classes
global cond if eec==99 
qui xtreg logkaufp epc i.time $cond , fe vce(cluster geo)
est sto panel_newepc


* matching DiD
preserve
	merge 1:1 geo time using "$WORKDATA/result_psmatch.dta", update 

	drop if missing(matchid)
	drop _merge

	* create variables for both the treated and matched controls
	gen aux = eec if complier==1
	bysort matchid: egen eec_matchid = max(aux)
	drop aux
	gen aux = modern if complier==1
	bysort matchid: egen modern_matchid = max(aux)
	drop aux
	
	** keep only when a month is available for both matched obs
	bysort matchid time: gen count = _N 
	keep if count == 2
	
	* DiD: 
	tsset geo time
	distinct geo 
	xtreg logkaufp epc i.time, fe vce(cluster matchid)
	est sto panel_mdid
restore



** Anticipation effects
	
* omit observations that have disclosed between October 2013 and April 2014	
global antcond !inrange(epc_change_time,monthly("2013 10","YM"),monthly("2014 04","YM"))

tsset geo time
xtreg logkaufp epc i.time if $antcond, fe vce(cluster geo)
est sto panel_anticip

quietly xtivreg logkaufp i.time (epc = bankspost) if $antcond, fe vce(cluster geo) first
est sto iv_anticip

***
* results for paper
***

estout panel_modern panel_newepc panel_anticip panel_mdid, ///
	cells("b(fmt(3) star) se(fmt(3))"   ) ///
	stats(N N_g, fmt(%9.0fc %9.0fc)) ///
	varwidth(12) starlevels(* 0.05 ** 0.01) ///
	drop(*time _cons) // style(tex)	
	
estout iv_modern iv_newepc iv_anticip fuzzy_rd, ///
	cells("b(fmt(3) star) se(fmt(3))") ///
	stats(N N_clust, fmt(%9.0fc %9.0fc)) ///
	varwidth(30) starlevels(* 0.05 ** 0.01) ///
	drop(*time _cons) // style(tex)

	
	

***********************************************************************************
* Table 6: Falsification Tests Using Placebo-Treatments
*********************************************************************************** 

preserve

	merge 1:1 geo time using "$WORKDATA/result_psmatch.dta", update 

	drop if missing(matchid)
	drop _merge

	** keep only when a month is available for both matched obs
	bysort matchid time: gen count = _N 
	keep if count == 2

	sort matchid geo time_disclosure

	foreach num of numlist 1(1)12 {
		cap drop f_`num'
		cap drop fc_`num'
		gen f_`num' = time_disclosure==-`num'
		gen fc_`num' = f_`num'*complier
	}
	
	xtset geo time
	xtreg logkaufp i.time epc f_1-fc_5, fe vce(cluster matchid)
	est sto did_falsific
	
	estout did_falsific, ///
	cells("b(fmt(3) star) se(fmt(3))" /*ci(fmt(3))*/   ) ///
	stats(N N_g, fmt(%9.0fc %9.0fc)) ///
	varwidth(12) starlevels(* 0.05 ** 0.01) ///
	drop(*time _cons f_*) // style(tex)	
restore	
	

	
***************************
* Table 8 (Online Appendix I): Estimates that control for duration 
***************************

* OLS regression
*******************

xtset plz
xtreg logkaufp epc $merkat_i $merkont ib1.objektzus ib5.anbietertyp i.time i.duration_kat if geolast, fe vce(robust)
est sto ols


* DiD regressions
*******************

* DiD: 
tsset geo time
qui xtreg logkaufp epc i.time i.duration_kat, fe vce(cluster geo)
est sto did

* DiD heterogenität nach Baujahr:
qui xtreg logkaufp epc epc_bau_1977 epc_bau_1900 i.time i.duration_kat, fe vce(cluster geo)
est sto did_bau


* IV regressions
*****************

* IV: Parsimonious Specification
quietly xtivreg logkaufp i.time i.duration_kat (epc = bankspost), fe vce(cluster geo) first
est sto iv

* IV regressions: heterogeneity by baujahr (interaction effects)
quietly xtivreg logkaufp i.time i.duration_kat (epc_bau_1900 epc_bau_1977 epc = bankspost_bau_1900 bankspost_bau_1977 bankspost), fe vce(cluster geo)
est sto iv_bau	
	

** results for paper
estout ols did did_bau iv iv_bau, ///
	cells( "b(fmt(3) star) se(par fmt(3))" ) ///
	stats(N N_clust, fmt(%9.0fc %9.0fc)) ///
	varwidth(12) substitute(_ \_) ///
	starlevels(* 0.05 ** 0.01) ///
	keep(epc epc*) // style(tex)
	
if ($LOGTIME == 1)	log close	




